library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(here)
## Warning: package 'here' was built under R version 4.1.3
## here() starts at C:/Users/mahri/OneDrive/CodeClan/rshiny_dashboard_project/Work In Progress/Demographics
library(readxl)
## Warning: package 'readxl' was built under R version 4.1.3
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(ggplot2)
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.1.3
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(stringr)
hosp_activity_agesex <- read_csv(here("../../raw_data/inpatient_and_daycase_by_nhs_board_of_treatment_age_and_sex.csv"))
## Rows: 129599 Columns: 18
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (12): Quarter, QuarterQF, HB, HBQF, Location, LocationQF, AdmissionType,...
## dbl (6): Episodes, LengthOfEpisode, AverageLengthOfEpisode, Stays, LengthOf...
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
hosp_activity_simd <-
read_csv(here("../../raw_data/inpatient_and_daycase_by_nhs_board_of_treatment_and_simd.csv"))
## Rows: 40894 Columns: 18
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (11): Quarter, QuarterQF, HB, HBQF, Location, LocationQF, AdmissionType,...
## dbl (7): SIMD, Episodes, LengthOfEpisode, AverageLengthOfEpisode, Stays, Le...
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
hosp_activity_agesex <- janitor::clean_names(hosp_activity_agesex)
# glimpse(hosp_activity_agesex)
hosp_activity_simd <- janitor::clean_names(hosp_activity_simd)
# glimpse(hosp_acivity_simd)
We only need ACUTE patients, what do we have:
admission_type: * Elective Inpatients (not acute) * Emergency
Inpatients
* Transfers (not acute)
* All Day cases (not acute)
* All Inpatients (not necasseraly acute)
* All Inpatients and Day cases (not acute)
* Not Specified (not acute???)
hosp_activity_agesex %>%
group_by(admission_type) %>%
summarise(total = n())
hosp_activity_simd %>%
group_by(admission_type) %>%
summarise(total = n())
removing those that aren’t acute (see above)
hosp_acute_activity_agesex <- hosp_activity_agesex %>%
filter(admission_type == "Emergency Inpatients")
hosp_acute_activity_simd <- hosp_activity_simd %>%
filter(admission_type == "Emergency Inpatients")
Age
hosp_acute_activity_agesex %>%
group_by(age) %>%
summarise(total_patients_in_age_group = n())
# 10 year increments i.e. 0-9, 10-19... 80-89, 90 years and over
# No NA values
hosp_acute_activity_agesex %>%
mutate(age = is.na(age)) %>%
filter(age == TRUE)
Sex
hosp_acute_activity_agesex %>%
group_by(sex) %>%
summarise(total_patients_in_sex_group = n())
# Female: 9,832 Male: 9,799
# No NA values
hosp_acute_activity_agesex %>%
mutate(sex = is.na(age)) %>%
filter(sex == TRUE)
SIMD
hosp_acute_activity_simd %>%
group_by(simd) %>%
summarise(total = n())
#There are 962 NAs and ~ 1000 of levels 1:5... remember to tidy this in analysis
quarter_dates <- c("2016Q3", "2016Q4", "2017Q1", "2017Q2", "2017Q3", "2017Q4",
"2018Q1", "2018Q2", "2018Q3", "2018Q4", "2019Q1", "2019Q2",
"2019Q3", "2019Q4", "2020Q1", "2020Q2", "2020Q3", "2020Q4",
"2021Q1", "2021Q2", "2021Q3")
Remember there were 7 admission types… I chose to keep one.
Might want to make use of this if we’re not sure about admission
types:
acute_target <- c("Emergency Inpatients", "Not Specified")
Others = Elective Inpatients, Transfers, All Day cases, All Inpatients, All Inpatients and Day cases, Not Specified
# do I want to use total length of stay or total average_length_of_stay
# need a total column:
# sometimes stay = 0, and length of stay = 5
hosp_acute_activity_agesex <- hosp_acute_activity_agesex %>%
group_by(sex, quarter) %>%
mutate(total_stays_per_quarter_sex = sum(stays))
hosp_acute_activity_agesex
ALSO WANT TOTALS FOR AGE BY QUARTER
hosp_acute_activity_agesex <- hosp_acute_activity_agesex %>%
group_by(age, quarter) %>%
mutate(total_stays_per_quarter_age = sum(stays))
hosp_acute_activity_agesex
hosp_acute_activity_agesex %>%
ggplot()+
aes(x = quarter,
y = total_stays_per_quarter_sex,
group = sex, colour = sex) +
geom_line() +
labs(x = "Yearly Quarter",
y = "Total Stays",
title = "Total Emergency Inpatient Stays Across Scotland by Sex",
subtitle = "Q3, 2016 - Q3, 2021",
colour = "Sex") +
theme_bw()+
theme(axis.text.x = element_text(angle=45, hjust=0.9))
hosp_acute_activity_agesex %>%
ggplot()+
aes(x = quarter,
y = total_stays_per_quarter_age,
group = age, colour = age) +
geom_point()+
geom_line() +
labs(x = "Yearly Quarter",
y = "Total Stays",
title = "Total Emergency Inpatient Stays Across Scotland by Age Group",
subtitle = "Q3, 2016 - Q3, 2021",
colour = "Age") +
theme_bw()+
theme(axis.text.x = element_text(angle=45, hjust=0.9))
SIMD (Scottish Index of Multiple Deprivation)
hosp_acute_activity_simd %>%
group_by(simd) %>%
summarise(total = n())
#There are 962 NAs and ~ 1000 of levels 1:5... remember to tidy this in analysis
Drop NAs
hosp_acute_activity_simd_no_na <- hosp_acute_activity_simd %>%
drop_na(simd)
hosp_acute_activity_simd_no_na
hosp_acute_activity_simd_no_na_simd <- hosp_acute_activity_simd_no_na %>%
group_by(simd, quarter) %>%
mutate(total_stays_per_quarter_simd = sum(stays))
hosp_acute_activity_simd_no_na_simd
hosp_acute_activity_simd_no_na_simd %>%
mutate(simd = fct_relevel(as.character(simd,
"1", "2", "3", "4", "5"))) %>%
ggplot()+
aes(x = quarter,
y = total_stays_per_quarter_simd,
group = simd, colour = simd) +
geom_point()+
geom_line() +
labs(x = "Yearly Quarter",
y = "Total Stays",
title = "Total Emergency Inpatient Stays Across Scotland by SIMD Level",
subtitle = "Q3, 2016 - Q3, 2021",
colour = "SIMD Level:
1 = Most Deprived
5 = Least Deprived") +
theme_bw()+
theme(axis.text.x = element_text(angle=45, hjust=0.9))
hosp_acute_activity_simd_no_na_simd <- hosp_acute_activity_simd_no_na %>%
group_by(simd, quarter) %>%
mutate(total_episodes_per_quarter_simd = sum(episodes))
hosp_acute_activity_simd_no_na_simd
hosp_acute_activity_simd_no_na_simd %>%
mutate(simd = fct_relevel(as.character(simd,
"1", "2", "3", "4", "5"))) %>%
ggplot()+
aes(x = quarter,
y = total_episodes_per_quarter_simd,
group = simd, colour = simd) +
geom_point()+
geom_line() +
labs(x = "Yearly Quarter",
y = "Total Stays",
title = "Total Emergency Inpatient Episodes Across Scotland by SIMD Level",
subtitle = "Q3, 2016 - Q3, 2021",
colour = "SIMD Level:
1 = Most Deprived
5 = Least Deprived") +
theme_bw()+
theme(axis.text.x = element_text(angle=45, hjust=0.9))